---
title: "Enclave Effect on County-Level Turnout"
author: "melissa_barales"
output: pdf_document
---

```{r}
setwd("~/Desktop/Senior Thesis/Census")
```

```{r}
library(haven)
library(dplyr)
library(tidyverse)
```

```{r}
library(stargazer)
```

```{r}
library(ggplot2)
library(magrittr)
library(broom)
library(sandwich)
library(coefplot)
```

```{r}
#loading in County Voter Turnout Data
dat <- readRDS("MergedData.rds")
```

```{r}
#loading in crosswalk file
crosswalk <- read_csv("geocorr2014_2201001592.csv")
```

```{r}
#creating a puma identifier variable in crosswalk file
crosswalk <-  crosswalk %>% unite("puma_2", state, puma12, sep= "-", 
        remove = FALSE)
```

```{r}
#loading in dataset of all Hispanic enclaves
Hisp_enclaves <- read.csv("Hisp_enclaves.csv")
```

```{r}
#creating an enclave residency variable in crosswalk file 
#1 - yes lives in an enclave
#0 - no does not live in an enclave
crosswalk_merged <- crosswalk %>% 
  mutate(enclave_res = ifelse (puma_2 %in% Hisp_enclaves$puma_2, 1, 0))
```

```{r}
table(crosswalk_merged$enclave_res)
```

```{r}
#Adding in the proportion of a puma's population that is made up of  a 
#Hispanic enclave to dataset of cross_walk merged
Hisp_enclaves <- Hisp_enclaves %>% select(puma_2, prop)

crosswalk_merged <- crosswalk_merged %>% 
  left_join(Hisp_enclaves, by="puma_2" )
```

```{r}
#Recoding all counties that do not have pumas w/ enclaves in them to 0
crosswalk_merged$prop[is.na(crosswalk_merged$prop)] <- 0
```

```{r}
#Calculating a variable that sums the proportion of a county's population
#that resides within an enclave for Maricopa county
Maricopa <- subset(crosswalk_merged, crosswalk_merged$county14==4013)

Maricopa <- Maricopa %>% mutate(enclave_pop=(pop14 * prop)) %>%
  group_by(county14) %>% summarise(county_prop=(sum(enclave_pop) / sum(pop14)))
```

```{r}
#Calculating a variable that sums the proportion of a county's population
#that resides within an enclave for entire dataset
crosswalk_pop <- crosswalk_merged %>%
  mutate(enclave_pop=(pop14 * prop)) %>%
  group_by(county14) %>%
  summarise(county_prop=(sum(enclave_pop) / sum(pop14)))
```

```{r}
#Another check for Contra County
Contra <- subset(crosswalk_merged, crosswalk_merged$county14==6013)

Contra <- Contra %>% 
  mutate(enclave_pop=(pop14 * prop)) %>% 
  group_by(county14) %>% 
  summarise(county_prop=(sum(enclave_pop) / sum(pop14)))
```

```{r}
#Merging in this county prop variable to crosswalk file
crosswalk <- crosswalk %>% 
  left_join(crosswalk_pop, by="county14") 
```

```{r}
#Excluding all county repeats from data file 
crosswalk <- crosswalk %>% 
  distinct(county14, .keep_all = TRUE)
```

```{r}
#Subsetting data to county id variable and county14 
crosswalk_1 <- crosswalk %>% 
  select(county14, county_prop) 
```

**Merging county prop to  voter turnout file**

```{r}
#Renaming FIPS variable to "county14"
dat <- dat %>% mutate(county14=FIPS)
```

```{r}
dat <- dat %>% select(-FIPS)
```

```{r}
#merging county prop variable from data crosswalk_1
dat <- dat %>% 
  left_join(crosswalk_1, by="county14")
```

```{r}
dat$med_hh_income <- (dat$med_hh_income/100000)
```

```{r}
dat$total_population <- (dat$total_population/1000000)
```

```{r}
summary(dat$county_prop)
```

**OLS REGRESSIONS OF ENCLAVE POPULATION ON COUNTY-LEVEL TURNOUT**

**2020 Turnout**
```{r}
turnout <- lm(data= dat, turnout_pop_20 ~ county_prop + med_hh_income + total_population)
```

```{r}
summary(turnout)
```

```{r echo=FALSE, results='asis'}
stargazer(turnout, type = "text")
```

```{r}
#inclusion of state fixed effects
turnout_state1 <- lm(data= dat, turnout_pop_20 ~ county_prop + med_hh_income +
                total_population + as.factor(State))
```

```{r}
summary(turnout_state1)
```

```{r}
stargazer(turnout_state1, type="text")
```

```{r}
turnout_2 <- lm(data= dat, turnout_20 ~ county_prop + med_hh_income + total_population)
```

```{r}
summary(turnout_2)
```

```{r echo=FALSE, results='asis'}
stargazer(turnout_2, type = "text")
```

**2016 Turnout**
```{r}
turnout_3 <- lm(data= dat, turnout_pop_16 ~ county_prop + med_hh_income + total_population)
```

```{r}
summary(turnout_3)
```

```{r echo=FALSE, results='asis'}
stargazer(turnout_3, type = "text")
```

```{r}
#inclusion of state fixed effects
turnout_state2 <- lm(data= dat, turnout_pop_16 ~ county_prop + med_hh_income +
                total_population + as.factor(State))
```

```{r}
summary(turnout_state2)
```
```{r}
turnout_4 <- lm(data= dat, turnout_16 ~ county_prop + med_hh_income + total_population)
```

```{r}
summary(turnout_4)
```

```{r echo=FALSE, results='asis'}
stargazer(turnout_4, type = "text")
```

```{r}
stargazer(turnout, turnout_3, dep.var.labels=c("2020 Voter Turnout", "2016 Voter Turnout"), 
          covariate.labels = c("Proportion of Enclave Population", 
                               "Median Houehold Income", 
                               "County Population"), 
          align=TRUE,
          title="Effect of Enclave Population on County Voter Turnout", type = "text",
          out = "county_turnout.htm")
```

```{r}
?multiplot
```

```{r}
countrymultiplot(turnout, turnout_3, innerCI=2, title="Figure 1: Effect of Enclave Population on County-Level Turnout",
          xlab="Turnout Likelihood (0-1)", ylab="Coefficient",  newNames=c(total_population="Total Population", med_hh_income="Median Income", county_prop="Enclave Population"), decreasing=TRUE, names = c("2016 Election", 
          "2020 Election"), intercept = FALSE) 
```

```{r}
png("county voter turnout.png")
multiplot(p1, p2, p3, p4, cols=2)
dev.off()
```

```{r}
stargazer(turnout_state1, turnout_state2, dep.var.labels=c("2020 Voter Turnout", "2016 Voter Turnout"),
          covariate.labels = c("Proportion of Enclave Population", 
                               "Median Houehold Income", 
                               "County Population", "State Fixed Effects"), 
          align=TRUE,
          title="Effect of Enclave Population on County Voter Turnout", type = "text",
          out = "county_turnout_ws.htm") 
```








